# 
# k <- readRDS("temp/vf_mn_dist.rds") |> 
#   filter(treated > 0)
# 
# lat <- readRDS("temp/vf_later_mn_dist.rds") |> 
#   mutate(treated = 0)
# 
# saveRDS(select(k, -LALVOTERID, -surname,
#                -street, -num, -rest, -Residence_Addresses_City,
#                -cong, -Voters_BirthDate), "temp/vf_mn_dist_anon.rds")
# 
# saveRDS(select(lat, -LALVOTERID, -surname,
#                -street, -Residence_Addresses_City,
#                -cong, -Voters_BirthDate), "temp/vf_later_mn_dist_anon.rds")

##########################################
##########################################
##########################################
##########################################

k <- bind_rows(readRDS("temp/vf_mn_dist_anon.rds"),
               readRDS("temp/vf_later_mn_dist_anon.rds"))

k <- k |> 
  ungroup() |> 
  mutate(voter_id = sample(1:n(), n()))

k <- k |> 
  filter(n_dead < 3 | is.na(n_dead),
         death_date <= "2020-12-31" | treated == 0) |> 
  rename(deced_age = age) |> 
  mutate(reg_date = as.numeric(as.Date(reg_date, "%m/%d/%Y") - as.Date("2010-01-01")),
         death_date = as.numeric(death_date - as.Date("2010-01-01")))



first_set <- filter(k,
                    treated > 0) |> 
  mutate(t = treated == 2)

first_set <- first_set[complete.cases(select(first_set,
                                             lat, lon, voter_age,
                                             reg_date,
                                             deced_age,
                                             death_date,
                                             n_dead,
                                             General_2012_11_06,
                                             General_2014_11_04,
                                             General_2016_11_08,
                                             General_2018_11_06,
                                             starts_with("pred."),
                                             male, 
                                             median_income, some_college, pop_dens)), ]

X <- select(first_set,
            lat, lon, voter_age,
            reg_date,
            deced_age,
            death_date,
            n_dead,
            General_2012_11_06,
            General_2014_11_04,
            General_2016_11_08,
            General_2018_11_06,
            starts_with("pred."),
            male, 
            median_income, some_college, pop_dens,
            -pred.other, -pred.aian)

mb <- ebalance(first_set$t, X)

first_set <- bind_rows(
  first_set |> 
    filter(t) |> 
    mutate(weight = 1),
  first_set |> 
    filter(!t) |> 
    mutate(weight = mb$w)
)

ll <- first_set |> 
  group_by(t) |> 
  summarize(across(starts_with("Gener"), ~ weighted.mean(., weight))) |> 
  pivot_longer(starts_with("Gen")) |> 
  mutate(name = as.integer(substring(name, 9, 12)))

ll2 <- first_set |> 
  group_by(t) |> 
  summarize(across(starts_with("Gener"), mean)) |> 
  pivot_longer(starts_with("Gen")) |> 
  mutate(name = as.integer(substring(name, 9, 12)))

ggplot(ll, aes(x = name, y = value, color = t)) + geom_line()

###################################################

second_set <- filter(k,
                     treated != 1) |> 
  mutate(t = treated == 2)

second_set <- second_set[complete.cases(select(second_set,
                                               lat, lon, voter_age,
                                               reg_date,
                                               deced_age,
                                               n_dead,
                                               General_2012_11_06,
                                               General_2014_11_04,
                                               General_2016_11_08,
                                               General_2018_11_06,
                                               starts_with("pred."),
                                               male, 
                                               median_income, some_college, pop_dens)), ]

X <- select(second_set,
            lat, lon, voter_age,
            reg_date,
            deced_age,
            n_dead,
            General_2012_11_06,
            General_2014_11_04,
            General_2016_11_08,
            General_2018_11_06,
            starts_with("pred."),
            male, 
            median_income, some_college, pop_dens,
            -pred.other, -pred.aian)

mb <- ebalance(second_set$t, X)

second_set <- bind_rows(
  second_set |> 
    filter(t) |> 
    mutate(weight = 1),
  second_set |> 
    filter(!t) |> 
    mutate(weight = mb$w)
)

ll <- second_set |> 
  group_by(t) |> 
  summarize(across(starts_with("Gener"), ~ weighted.mean(., weight))) |> 
  pivot_longer(starts_with("Gen")) |> 
  mutate(name = as.integer(substring(name, 9, 12)))

ggplot(ll, aes(x = name, y = value, color = t)) + geom_line()

##########################

full <- bind_rows(
  first_set,
  filter(second_set, !t)
) |> 
  pivot_longer(starts_with("Gener")) |> 
  mutate(name = as.integer(substring(name, 9, 12)),
         t1 = treated > 0,
         t2 = treated > 1) |> 
  rename(year = name,
         turnout = value)

ll <- full |> 
  group_by(treated, year) |> 
  summarize(`Weighted` = weighted.mean(turnout, weight),
            `Unweighted` = mean(turnout)) |> 
  pivot_longer(cols = c(Weighted, Unweighted), names_to = "pan", values_to = "turnout")


ll <- ll |> 
  mutate(group = case_when(treated == 0 ~ "No Household Deaths",
                           treated == 1 ~ "Household Non-Covid Death",
                           treated == 2 ~ "Household Covid Death"))

saveRDS(ll, "temp/weighted_full_ll_mn_dist.rds")

ggplot(ll, aes(x = year, y = turnout, linetype = group, shape = group)) + geom_line() +
  geom_point() +
  facet_grid(. ~ pan) +
  theme_bc(legend.position = "bottom") +
  guides(linetype = guide_legend(title.position = "top", title.hjust = 0.5),
         shape = guide_legend(title.position = "top", title.hjust = 0.5)) +
  labs(x = "Year", y = "Turnout\n(share of voters registered in 2020)",
       linetype = "Treatment Group",
       shape = "Treatment Group",
       caption = "'No Household Deaths' and weighted to mirror 'Household Covid Death' using entropy balancing. Balancing covariates include decedent's age, number of household deaths, latitude, longitude, party affiliation, voter's age, registration date, BISG racial predictions, gender, party, block group median income, block group education, block group population density, and block group Covid death rates. 'Household Non-Covid Death' are weighted using the preceding covariates, along with decedent's date of death.") +
  scale_y_continuous(labels = scales::percent)

ggsave("temp/first_mn_dist.png", width = 6, height = 4.5, units = "in")

#############################

m1 <- fixest::feols(turnout ~ t1 * t2 * I(year == 2020) | year + voter_id, 
                    full, cluster = c("voter_id", "year"),
                    weights = full$weight)

saveRDS(m1, "temp/mn_model_1_dist.rds")

